#########################################################################
# FigureG1.R
#########################################################################

library(tidyverse)
library(stringi)
library(estimatr)

#########################################################################
# 1. Process Data
#########################################################################

# Ballot Order for US House
patternA <- "Dunleavy/Dahlstrom_Gara/Cook_Pierce/Grunwald_Walker/Drygas" # 1, 5, 9, ...
patternB <- "Gara/Cook_Pierce/Grunwald_Walker/Drygas_Dunleavy/Dahlstrom" # 2, 6, 10, ...
patternC <- "Pierce/Grunwald_Walker/Drygas_Dunleavy/Dahlstrom_Gara/Cook" # 3, 7, 11, ...
patternD <- "Walker/Drygas_Dunleavy/Dahlstrom_Gara/Cook_Pierce/Grunwald" # 4, 8, 12, ...


dt <- read_csv("cvr_Alaska_11082022_GovernorLieutenantGovernor.csv")

bt <- dt %>%
  mutate(district = str_extract(precinct, "[0-9]+")) %>%
  filter(grepl("District", precinct)) %>%
  mutate(ballot = case_when(
    district %in% seq(from = 1, to = 37, by = 4) ~ "type A",
    district %in% seq(from = 2, to = 38, by = 4) ~ "type B",
    district %in% seq(from = 3, to = 39, by = 4) ~ "type C",
    district %in% seq(from = 4, to = 40, by = 4) ~ "type D"
  )) %>%
  distinct(ballot_type, ballot)


df <- dt %>%
  mutate(district = as.numeric(str_extract(precinct, "[0-9]+"))) %>%
  mutate(
    rank1 = gsub(",.*$", "", rank1),
    rank2 = gsub(",.*$", "", rank2),
    rank3 = gsub(",.*$", "", rank3),
    rank4 = gsub(",.*$", "", rank4)
  ) %>%
  filter(
    !grepl("skipped", rank1),
    !grepl("skipped", rank2),
    !grepl("skipped", rank3),
    !grepl("skipped", rank4)
  ) %>%
  unite(pattern, rank1:rank4) %>%
  left_join(bt, by = "ballot_type") %>%
  mutate(
    donkey_A = ifelse(pattern == patternA, 1, 0),
    donkey_B = ifelse(pattern == patternB, 1, 0),
    donkey_C = ifelse(pattern == patternC, 1, 0),
    donkey_D = ifelse(pattern == patternD, 1, 0),
    treat_A = ifelse(ballot == "type A", 1, 0),
    treat_B = ifelse(ballot == "type B", 1, 0),
    treat_C = ifelse(ballot == "type C", 1, 0),
    treat_D = ifelse(ballot == "type D", 1, 0)
  ) %>%
  as_tibble()


#########################################################################
# 2. Estimate the Proportion of Donkey Voting
#########################################################################

m_df1 <- data.frame(
  Estimate = as.numeric(),
  Low = as.numeric(),
  High = as.numeric(),
  District = as.numeric()
)
m_df2 <- data.frame(
  Estimate = as.numeric(),
  Low = as.numeric(),
  High = as.numeric(),
  District = as.numeric()
)
m_df3 <- data.frame(
  Estimate = as.numeric(),
  Low = as.numeric(),
  High = as.numeric(),
  District = as.numeric()
)
m_df4 <- data.frame(
  Estimate = as.numeric(),
  Low = as.numeric(),
  High = as.numeric(),
  District = as.numeric()
)

for (i in 1:40) {
  m <- lm_robust(donkey_A ~ 1, df, subset = district == i)
  m_df1[i, "Estimate"] <- m$coefficients
  m_df1[i, "Low"] <- m$conf.low
  m_df1[i, "High"] <- m$conf.high
  m_df1[i, "District"] <- i

  m <- lm_robust(donkey_B ~ 1, df, subset = district == i)
  m_df2[i, "Estimate"] <- m$coefficients
  m_df2[i, "Low"] <- m$conf.low
  m_df2[i, "High"] <- m$conf.high
  m_df2[i, "District"] <- i

  m <- lm_robust(donkey_C ~ 1, df, subset = district == i)
  m_df3[i, "Estimate"] <- m$coefficients
  m_df3[i, "Low"] <- m$conf.low
  m_df3[i, "High"] <- m$conf.high
  m_df3[i, "District"] <- i

  m <- lm_robust(donkey_D ~ 1, df, subset = district == i)
  m_df4[i, "Estimate"] <- m$coefficients
  m_df4[i, "Low"] <- m$conf.low
  m_df4[i, "High"] <- m$conf.high
  m_df4[i, "District"] <- i
}


m_df1 %>%
  mutate(treated = ifelse(District %in% seq(from = 1, to = 37, by = 4), 1, 0)) %>%
  group_by(treated) %>%
  summarize(DID = mean(Estimate))

m_df2 %>%
  mutate(treated = ifelse(District %in% seq(from = 2, to = 38, by = 4), 1, 0)) %>%
  group_by(treated) %>%
  summarize(DID = mean(Estimate))

m_df3 %>%
  mutate(treated = ifelse(District %in% seq(from = 3, to = 39, by = 4), 1, 0)) %>%
  group_by(treated) %>%
  summarize(DID = mean(Estimate))

m_df4 %>%
  mutate(treated = ifelse(District %in% seq(from = 4, to = 40, by = 4), 1, 0)) %>%
  group_by(treated) %>%
  summarize(DID = mean(Estimate))

ate_A <- lm_robust(donkey_A ~ treat_A, df)
ate_B <- lm_robust(donkey_B ~ treat_B, df)
ate_C <- lm_robust(donkey_C ~ treat_C, df)
ate_D <- lm_robust(donkey_D ~ treat_D, df)


#########################################################################
# 3. Generate Figure G2
#########################################################################

pdf("FigureG2.pdf", width = 6.5, height = 6.3)

par(
  mfrow = c(2, 2),
  mar = c(4, 4, 2, 1)
)


plot(m_df1$Estimate ~ m_df1$District,
  pch = 16, col = "gray",
  ylab = "Proportion",
  xlab = "District",
  main = "",
  ylim = c(0, 0.2),
  mgp = c(2, 0.7, 0)
)
title("A. Voting for \nDunleavy > Gara > Pierce > Walker",
  cex.main = 0.9, adj = 0
)

arrows(
  x0 = m_df1$District, x1 = m_df1$District,
  y0 = m_df1$Low, y1 = m_df1$High, col = "gray",
  length = 0
)
points(
  m_df1$Estimate[m_df1$District %in% seq(from = 1, to = 37, by = 4)] ~
    m_df1$District[m_df1$District %in% seq(from = 1, to = 37, by = 4)],
  col = "darkred", pch = 16,
)
text(
  x = seq(from = 1, to = 37, by = 4),
  y = m_df1$High[m_df1$District %in% seq(from = 1, to = 37, by = 4)] + 0.01,
  seq(from = 1, to = 37, by = 4),
  col = "darkred", cex = 0.8
)
arrows(
  x0 = m_df1$District[m_df1$District %in% seq(from = 1, to = 37, by = 4)],
  x1 = m_df1$District[m_df1$District %in% seq(from = 1, to = 37, by = 4)],
  y0 = m_df1$Low[m_df1$District %in% seq(from = 1, to = 37, by = 4)],
  y1 = m_df1$High[m_df1$District %in% seq(from = 1, to = 37, by = 4)],
  col = "darkred",
  length = 0
)
text(10, 0.2 * 0.9,
  labels = paste0("avg. diff = ", round(ate_A$coefficients[2], 3)),
  font = 1, col = "darkred"
)
abline(h = ate_A$coefficients[1], col = "gray", lwd = 0.8)
rect(-1, 0, 42, ate_A$coefficients[1], col = alpha("gray80", 0.3), lty = 0)


plot(m_df2$Estimate ~ m_df2$District,
  pch = 16, col = "gray",
  ylab = "Proportion",
  xlab = "District",
  main = "",
  ylim = c(0, 0.1),
  mgp = c(2, 0.7, 0)
)
title("B. Voting for \nGara > Pierce > Walker > Dunleavy",
  cex.main = 0.9, adj = 0
)


arrows(
  x0 = m_df2$District, x1 = m_df2$District,
  y0 = m_df2$Low, y1 = m_df2$High, col = "gray",
  length = 0
)
points(
  m_df2$Estimate[m_df2$District %in% seq(from = 2, to = 38, by = 4)] ~
    m_df2$District[m_df2$District %in% seq(from = 2, to = 38, by = 4)],
  col = "darkred", pch = 16,
)
text(
  x = seq(from = 2, to = 38, by = 4),
  y = m_df2$High[m_df2$District %in% seq(from = 2, to = 38, by = 4)] + 0.0025,
  seq(from = 2, to = 38, by = 4),
  col = "darkred", cex = 0.8
)
arrows(
  x0 = m_df2$District[m_df2$District %in% seq(from = 2, to = 38, by = 4)],
  x1 = m_df2$District[m_df2$District %in% seq(from = 2, to = 38, by = 4)],
  y0 = m_df2$Low[m_df2$District %in% seq(from = 2, to = 38, by = 4)],
  y1 = m_df2$High[m_df2$District %in% seq(from = 2, to = 38, by = 4)],
  col = "darkred",
  length = 0
)
text(10, 0.1 * 0.9,
  labels = paste0("avg. diff = ", round(ate_B$coefficients[2], 3)),
  font = 1, col = "darkred"
)
abline(h = ate_B$coefficients[1], col = "gray", lwd = 0.8)
rect(-1, 0, 42, ate_B$coefficients[1], col = alpha("gray80", 0.3), lty = 0)



plot(m_df3$Estimate ~ m_df3$District,
  pch = 16, col = "gray",
  ylab = "Proportion",
  xlab = "District",
  main = "",
  ylim = c(0, 0.05),
  mgp = c(2, 0.7, 0)
)
title("C. Voting for \nPierce > Walker > Dunleavy > Gara",
  cex.main = 0.9, adj = 0
)


arrows(
  x0 = m_df3$District, x1 = m_df3$District,
  y0 = m_df3$Low, y1 = m_df3$High, col = "gray",
  length = 0
)
points(
  m_df3$Estimate[m_df3$District %in% seq(from = 3, to = 39, by = 4)] ~
    m_df3$District[m_df3$District %in% seq(from = 3, to = 39, by = 4)],
  col = "darkred", pch = 16,
)
text(
  x = seq(from = 3, to = 39, by = 4),
  y = m_df3$High[m_df3$District %in% seq(from = 3, to = 39, by = 4)] + 0.0025,
  seq(from = 3, to = 39, by = 4),
  col = "darkred", cex = 0.8
)
arrows(
  x0 = m_df3$District[m_df3$District %in% seq(from = 3, to = 39, by = 4)],
  x1 = m_df3$District[m_df3$District %in% seq(from = 3, to = 39, by = 4)],
  y0 = m_df3$Low[m_df3$District %in% seq(from = 3, to = 39, by = 4)],
  y1 = m_df3$High[m_df3$District %in% seq(from = 3, to = 39, by = 4)],
  col = "darkred",
  length = 0
)
text(10, 0.05 * 0.9,
  labels = paste0("avg. diff = ", round(ate_C$coefficients[2], 3)),
  font = 1, col = "darkred"
)
abline(h = ate_C$coefficients[1], col = "gray", lwd = 0.8)
rect(-1, 0, 42, ate_C$coefficients[1], col = alpha("gray80", 0.3), lty = 0)



plot(m_df4$Estimate ~ m_df4$District,
  pch = 16, col = "gray",
  ylab = "Proportion",
  xlab = "District",
  main = "",
  ylim = c(0, 0.2),
  mgp = c(2, 0.7, 0)
)
title("C. Voting for \nWalker > Dunleavy > Gara > Pierce",
  cex.main = 0.9, adj = 0
)


arrows(
  x0 = m_df4$District, x1 = m_df4$District,
  y0 = m_df4$Low, y1 = m_df4$High, col = "gray",
  length = 0
)
points(
  m_df4$Estimate[m_df4$District %in% seq(from = 4, to = 40, by = 4)] ~
    m_df4$District[m_df4$District %in% seq(from = 4, to = 40, by = 4)],
  col = "darkred", pch = 16,
)
text(
  x = seq(from = 4, to = 40, by = 4),
  y = m_df4$High[m_df4$District %in% seq(from = 4, to = 40, by = 4)] + 0.01,
  seq(from = 4, to = 40, by = 4),
  col = "darkred", cex = 0.8
)
arrows(
  x0 = m_df4$District[m_df4$District %in% seq(from = 4, to = 40, by = 4)],
  x1 = m_df4$District[m_df4$District %in% seq(from = 4, to = 40, by = 4)],
  y0 = m_df4$Low[m_df4$District %in% seq(from = 4, to = 40, by = 4)],
  y1 = m_df4$High[m_df4$District %in% seq(from = 4, to = 40, by = 4)],
  col = "darkred",
  length = 0
)
text(10, 0.2 * 0.9,
  labels = paste0("avg. diff = ", round(ate_D$coefficients[2], 3)),
  font = 1, col = "darkred"
)
abline(h = ate_D$coefficients[1], col = "gray", lwd = 0.8)
rect(-1, 0, 42, ate_D$coefficients[1], col = alpha("gray80", 0.3), lty = 0)


dev.off()

#############################################################################
# END OF THIS R SOURCE FILE
#############################################################################
